title: “Geography 176A” author: “Van Jackson” subtitle: ‘Lab 04: Tessalations, Point in Polygon’ output: html_document: theme: journal

#Libraries

# SPDS
library(tidyverse) # data wrangling
## ── Attaching packages ─────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.1
## ✓ tidyr   1.1.1     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readxl)    # import xlsx data
library(sf)        # Working with vector data
## Linking to GEOS 3.8.1, GDAL 3.1.1, PROJ 6.3.1
library(rmapshaper)# Simplify geometries
## Registered S3 method overwritten by 'geojsonlint':
##   method         from 
##   print.location dplyr
library(units)     # manage your units
## udunits system database from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/units/share/udunits
#Data
library(USAboundaries) # county boundaries
#Visualization
library(gghighlight) # ggplot conditional highlighting
library(knitr) # table generation
library(kableExtra) # making tables pretty
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows

#Question 1

#1.1
CONUS = USAboundaries::us_counties(resolution = "low") %>% 
  filter(state_name != "Hawaii") %>% 
  filter(state_name != "Alaska") %>% 
  filter(state_name != "Puerto Rico") %>% 
  st_transform(5070)

CONUS_u = st_union(CONUS)
CONUS_simp = ms_simplify(CONUS_u, keep = .05)

CONUSpnts = mapview:: npts(CONUS)
simppts = mapview::npts(CONUS_simp)
#1.2
county_centroid = st_centroid(CONUS) %>% 
  st_combine() 
## Warning in st_centroid.sf(CONUS): st_centroid assumes attributes are constant
## over geometries of x
#1.3 -1.5 Tessalations
#Voronoi
v_grid = st_voronoi(county_centroid) %>% 
  st_cast() %>% 
  st_as_sf()  %>% 
  mutate(id = 1:n())

#Triangulated
t_grid = st_triangulate(county_centroid) %>% 
  st_cast() %>% 
  st_as_sf() %>% 
  mutate(id = 1:n())

#Grid Coverage
sq_grid = st_make_grid(CONUS_simp, n = c(70, 50)) %>% 
  st_cast() %>% 
  st_as_sf() %>% 
  mutate(id = 1:n())
  
#Hexagonal Coverage
hex_grid = st_make_grid(CONUS_simp, n = c(50,70), square = FALSE) %>% 
  st_cast() %>% 
  st_as_sf() %>% 
  mutate(id = 1:n())
#1.6 Plot
plot_tess = function(data, title)
{ggplot() +
    geom_sf(data = data, fill = "white", col = "navy", size = .2) +
    theme_void() +
    labs(title = title, caption = paste("This tesselation contains:", nrow(data), "tiles")) +
    theme(plot.title = element_text(hjust = .5, color = "black", face = "bold"))}

#Original
plot_tess(data = CONUS_simp, "Original County Data")

#Voronoi
v_grid = st_intersection(v_grid, st_union(CONUS_simp))
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
plot_tess(v_grid, "Voronoi") +
  geom_sf(data = county_centroid, col = "red", size = 0.2)

#Triangulated
t_grid = st_intersection(t_grid, st_union(CONUS_simp))
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
plot_tess(t_grid, "Triangulated") +
  geom_sf(data = county_centroid, col = "red", size = 0.2)

#Gridded
plot_tess(sq_grid, "Square Coverage")

#Hexagonal
plot_tess(hex_grid, "Hexogonal Coverage")

#Question 2

#2.1
sf_to_df = function(sf_object, description) {
  area_sf= st_area(sf_object) %>% 
    set_units('km^2') %>% 
    drop_units()
  area_df = data.frame(tesselation = description, features = length(area_sf), mean_area = mean(area_sf), std = sd(area_sf), total_area = sum(area_sf))

  return(area_df)
}

description= "test"
#2.2
voroni_df = sf_to_df(v_grid, "voroni tesselation")
tri_df = sf_to_df(t_grid, "triangulation tesselation")
sq_df = sf_to_df(sq_grid, "square grid tesselation")
hex_df = sf_to_df(hex_grid, "hexagonal tesselation")
counties_df = sf_to_df(CONUS, "county")
#2.3
tess_summary = bind_rows(
  sf_to_df(t_grid, "triangulation"),
  sf_to_df(v_grid, "voroni"),
  sf_to_df(sq_grid, "square grid"),
  sf_to_df(hex_grid, "hexagonal grid"),
  sf_to_df(sf_object= CONUS, "county"))
#2.4
?mean()

knitr::kable(tess_summary, caption = "US county tesselations", col.names = c("Tesselations", "Number of features", "Mean Area", "Standard Deviation", "Total area"))
US county tesselations
Tesselations Number of features Mean Area Standard Deviation Total area
triangulation 6195 1253.691 1578.647 7766614
voroni 3106 2525.425 2886.661 7843970
square grid 2256 3761.443 0.000 8485816
hexagonal grid 1175 7368.799 0.000 8658339
county 3108 2521.745 3404.325 7837583

#2.5 Comment - As you move from triangulation at the top of the chart to the original county data atthe bottom, the area moves from smallest to largest. Each individual tesselation returns a specific part of the US and counties. The point in polygon will be affected by the different areas returned by each of the tesselations.

#Question 3

#3.1
NID2019_U = read_excel("/users/owner1/github/geog-176A-labs/data/NID2019_U.xlsx") %>% 
  filter(!is.na(LATITUDE)) %>% 
  st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs = 4326) %>% 
  st_transform(5070)
#3.2

point_in_polygon = function(points, polygon, group){
  st_join(polygon, points) %>% 
    st_drop_geometry() %>% 
    count(get(group)) %>% 
    setNames(c(group, "n")) %>% 
    left_join(polygon, by = group) %>% 
    st_as_sf()
}
#3.3
voroni_pip = point_in_polygon(NID2019_U, polygon = v_grid, group = "id")
triangulation_pip = point_in_polygon(NID2019_U, t_grid, group = "id")
grid_pip = point_in_polygon(NID2019_U, sq_grid, group = "id")
hexagonal_pip = point_in_polygon(NID2019_U, hex_grid, group = "id")
county_pip = point_in_polygon(NID2019_U, CONUS, group = "geoid")
#3.4
plot_pip = function(data, title){
  ggplot() +
    geom_sf(data = data, aes(fill = n), col = NA, alpha = .9, size= .2) +
    scale_fill_gradient(low = "white", high = "blue") +
    theme_void() +
    labs(title = "dams in the US", caption = paste0(sum(data$n), "dams represented")) +
    theme(legend.position = "none", plot.title = element_text(face = "bold", color = "blue", hjust = .5, size = 24))
}

plot_pip(voroni_pip, "Voroni Point in Polygon")

plot_pip(county_pip, "Raw County Data Point in Polygon")

#3.5
voroni_pip = point_in_polygon(NID2019_U, polygon = v_grid, group = "id")
triangulation_pip = point_in_polygon(NID2019_U, t_grid, group = "id")
grid_pip = point_in_polygon(NID2019_U, sq_grid, group = "id")
hexagonal_pip = point_in_polygon(NID2019_U, hex_grid, group = "id")
county_pip = point_in_polygon(NID2019_U, CONUS, group = "geoid")

plot_pip(voroni_pip)

plot_pip(triangulation_pip)

plot_pip(grid_pip)

plot_pip(hexagonal_pip)

plot_pip(county_pip)

The hex and square tesselations provided a different result than the others. This is because the others were plotted by county and the hex and squares were plotted under equal ratios of area. Moving forward I am only going to use the hexagonal tesselation. It was either that or square tesselation becasue of the fact that it takes into account the area and size rather than just the county.

#Question 4

#4.1
fireprot_dams =  point_in_polygon(filter(NID2019_U, grepl("P", NID2019_U$PURPOSES)), hex_grid, "id")
watersup_dams =  point_in_polygon(filter(NID2019_U, grepl("S", NID2019_U$PURPOSES)), hex_grid, "id")
hydroelec_dams =  point_in_polygon(filter(NID2019_U, grepl("H", NID2019_U$PURPOSES)), hex_grid, "id")
irrig_dams =  point_in_polygon(filter(NID2019_U, grepl("I", NID2019_U$PURPOSES)), hex_grid, "id")
#4.2
plot_pip2 = function(data, text){
  ggplot() +
    geom_sf(data = data, aes(fill = n), alpha = .9, size = .2) +
    gghighlight(n > mean(n) +sd(n)) +
    viridis::scale_fill_viridis() +
    theme_void() +
    theme(plot.title = element_text(face = "bold", color = "black", size = 24), plot.subtitle = element_text(size = 12),
          plot.caption = element_text(face = "bold", size = 12), legend.title = element_text(face = "bold"),
          legend.text = element_text(face = "bold")) +
    labs(title = text,
         subtitle = "Data Source: National Inventory of Dams",
         fill = "Number of dams",
         caption = paste0(sum(data$n), "dams")) +
    theme(aspect.ratio = .5)
}
#4.3
plot_pip2(fireprot_dams, "Fire Protection Dams")

plot_pip2(watersup_dams, "Water Suppy Dams")

plot_pip2(hydroelec_dams, "Hydroelectric Dams")

plot_pip2(irrig_dams, "Irrigation Dams")

#Question 4 Comments Fire protection dams are mainly central US. Possibly due to the drier climates which allow for fires to start easier. Water supply dams are scattered throughout the east, central, and west US; located on bigger water supplys.Hydroelectric dams are also scattered but also seem to be most dense in the Northeastern US. Irrigation dams are found more on the west US; this oculd be due to the fact that the western US does not receive nearly as much rainfall than Eastern US